# "Media's Influence on LGBTQ Support Across Africa" by Stephen Winkler

# Code to generate descriptive findings, including:
#   - Table A.2 in Appendix (correlation matrix)
#   - Table A.1 in Appendix (descriptive stats of main vars)
#   - Figure 1 in Main Text (distribution of dependent var by country)
#   - Figure A.1a-b in Appendix (distribution of ordinal and binary dependent var)
#   - Figure A.2 in Appendix (distribution of binary dependent var by country)
#   - Figure A.3a-f in Appendix (distribution of independent vars)
#   - Table 1 in Main Text (changes in media consumption R5->R6)

rm(list=ls()) # clear environment
source("Rprofile.R") # setup Rprofile (load packages, etc.)

myData <- readRDS("data/afrobarometer.rds")

# Convert vars to numeric
myData$radio <- as.numeric(myData$radio)
myData$tv <- as.numeric(myData$tv)
myData$newspaper <- as.numeric(myData$newspaper)
myData$internet <- as.numeric(myData$internet)
myData$social_media <- as.numeric(myData$social_media)
myData$media_aggregate <- as.numeric(myData$media_aggregate)
myData$sexuality <- as.numeric(myData$sexuality)
myData$sexuality2 <- as.numeric(myData$sexuality2)

# Correlation Matrix of Media Consumption
media_data <- dplyr::select(myData, radio, tv, newspaper,
                     internet, social_media)
media_cov_table <-
  cor(media_data, use = "pairwise.complete.obs" , method = "pearson" )

# Generate Latex Table for Appendix
# This is Table 4 in Appendix
stargazer(media_cov_table, title="Pearson Correlation Matrix of Media Consumption") # latex output

# Descriptive Stats of Main Vars
my_vars <- c("sexuality2",
             "media_aggregate", "radio", "tv", "newspaper", "internet", "social_media",
             "female", "education", "religiosity", "age", "income_water", "urban",
             "relig_tol2", "ethnic_tol2", "hiv_tol2", "immig_tol2",
             "fh_scale", "KOFSoGI"
             )
data_subset <- myData[,c("sexuality2",
                         "relig_tol2", "ethnic_tol2", "hiv_tol2", "immig_tol2",
                          "media_aggregate", "radio", "tv", "newspaper", "internet", "social_media",
                          "female", "education", "religiosity", "age", "income_water", "urban",
                          "fh_scale", "KOFSoGI")
                      ]
data_subset$sexuality2 <- as.numeric(data_subset$sexuality2)
data_subset$female <- as.numeric(data_subset$female)
data_subset$education <- as.numeric(data_subset$education)
data_subset$urban <- as.numeric(data_subset$urban)
data_subset$religiosity <- as.numeric(data_subset$religiosity)
data_subset$age <- as.character(data_subset$age) #set age to character first
data_subset$age <- as.numeric(data_subset$age) # then convert to numeric so that it doesn't relevel at 0
data_subset$income_water <- as.numeric(data_subset$income_water)
data_subset$relig_tol2 <- as.numeric(data_subset$relig_tol2)
data_subset$ethnic_tol2 <- as.numeric(data_subset$ethnic_tol2)
data_subset$hiv_tol2 <- as.numeric(data_subset$hiv_tol2)
data_subset$immig_tol2 <- as.numeric(data_subset$immig_tol2)

# Summary Table Statistics Function
stat_sum <- function(vec) {
  out <- c (min(vec, na.rm=T),  max(vec, na.rm=T), mean(vec, na.rm=T), sd(vec, na.rm=T))
  return(out)
}

# Apply function to stats
tab_sum <- round(t( apply(data_subset, 2, stat_sum)), 2)

# Label Columns 
colnames(tab_sum) <- c("Min", "Max", "Mean", "StDev")

# Label Rows w var names
row.names(tab_sum) <- c("LGBTQ tolerance", "Religous tolerance", "Ethnic tolerance", "HIV+ tolerance",
                        "Immigrant tolerance", "Aggregate media consumption", "Radio consumption", "TV consumption",
                        "Newspaper consumption", "Internet consumption", "Social media consumption", "Sex (1=female)",
                        "Education level", "Religiosity", "Age", "Water access", "Urban", "Freedom House scale", "KOF Score")

# Generate Latex Table for Appendix
# This is Table 3 in Appendix 
xtable(tab_sum, caption = "Descriptive Statistics of Primary Variables",
       align = "lllll") 

# Distribution of dependent variable
# Create dot plot for percent who dislike having homosexual neighbor
# (Figure 1 in Main Text)

myData_subset <- subset(myData, myData$sexuality2!="NA") #get rid of countries that don't have data

# Turn lgbt into binary for DISLIKE lgbt 
myData_subset$sexuality2 <- as.numeric(myData_subset$sexuality2)
myData_subset$dislike_lgbt <- 1
myData_subset$dislike_lgbt[myData_subset$sexuality2>0] <- 0

# make "all Africa" country
x <- myData_subset
x$country <- "Africa Average"
myData_subset <- rbind(myData_subset, x)

afro_data <- split(myData_subset, myData_subset$country)

# function for mean response by country
conf_int_2 <- function(data) {
  n = length(data$dislike_lgbt) 
  sigma = sd(data$dislike_lgbt) 
  sem = sigma/sqrt(n)
  E = qnorm(.975)*sem   
  xbar = mean(data$dislike_lgbt)   # sample mean
  xbar + c(-E, 0, E)
}

y <- lapply(afro_data, conf_int_2)
data_out <- t(bind_cols(y))
afro2 <- data.frame( rownames(data_out), data.frame(data_out, row.names=NULL), stringsAsFactors = F)
colnames(afro2) <- c("country", "lower", "perc_dislike", "upper")

# check output
all( y[[1]] == afro2[1,2:4] )
all( y[[34]] == afro2[34,2:4] )

# impose order on factor variable
afro2$country <- factor(afro2$country, levels = afro2$country[rev(order(afro2$perc_dislike))])

# Create plot for main text 
dv_ctry_plot <- 
  ggplot(afro2, aes(x=perc_dislike, y=country)) +
  geom_point(size=3, shape = 18 ) +
  scale_x_continuous(labels = scales::percent, breaks = c(.3,.4,.5,.6,.7,.8,.9,1.0)) +
  geom_hline(yintercept = 26, size = .3, linetype = "dashed") +
  theme_bw() + theme(axis.line.x = element_blank(),
                     axis.line.y = element_blank(),
                     panel.grid.major.x = element_blank(),
                     panel.grid.major.y = element_line(color = "grey"),
                     panel.grid.minor = element_blank(),
                     panel.border = element_blank(),
                     panel.background = element_blank(),
                     plot.title = element_text(hjust = 0.5)) +
  labs(x = "% in country who dislike homosexuality",
       y = "") 

ggsave(filename = "figures/dv_ctry_plot.pdf", # save in directory 
       plot = dv_ctry_plot,
       width = 8, height = 6.5)

# Dist. of Ordinal DV
# Figure 5a in Appendix 
dv_plot1 <- 
  ggplot(data = subset(myData, !is.na(sexuality)), aes(x = sexuality)) + 
  geom_bar() +
  xlab("") + 
  theme(plot.title = element_text(hjust = 0.5)) + theme_bw()
ggsave(filename = "figures/dv_plot1.pdf", plot = dv_plot1) #Save it to directory

# Dist of Binary DV
# Figure 5b in Appendix 
dv_plot2 <- 
  ggplot(data = subset(myData, !is.na(sexuality2)), aes(x = sexuality2)) + 
  geom_bar() + scale_x_discrete(labels = c("Dislike", "Don't Care / Like")) +
  xlab("") + 
  theme(plot.title = element_text(hjust = 0.5)) + theme_bw()
ggsave(filename = "figures/dv_plot2.pdf", plot = dv_plot2)

# Dist of Binary by country
# Figure 6 in Appendix 
dv_plot3 <- 
  dv_plot2 +
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
  facet_wrap("country")
ggsave(filename = "figures/dv_plot3.pdf", height = 6, plot = dv_plot3)

# Ordinal Distribution of Radio, TV, Newspaper, Internet, Social Media 
# Figure 7a-f in Appendix 
iv_plot0 <-
  ggplot(data = subset(myData, !is.na(media_aggregate)), aes(x = media_aggregate)) +
  geom_bar() + 
  xlab("") +
  theme(plot.title = element_text(hjust = 0.5)) + theme_bw()
ggsave(filename = "figures/iv_plot0.pdf", plot = iv_plot0)

iv_plot1 <-
  ggplot(data = subset(myData, !is.na(radio)), aes(x = radio)) +
  geom_bar() + 
  xlab("") +
  theme(plot.title = element_text(hjust = 0.5)) + theme_bw()
ggsave(filename = "figures/iv_plot1.pdf", plot = iv_plot1)

iv_plot2 <-
  ggplot(data = subset(myData, !is.na(tv)), aes(x = tv)) +
  geom_bar() + 
  xlab("") +
  theme(plot.title = element_text(hjust = 0.5)) + theme_bw()
ggsave(filename = "figures/iv_plot2.pdf", plot = iv_plot2)

iv_plot3 <-
  ggplot(data = subset(myData, !is.na(newspaper)), aes(x = newspaper)) +
  geom_bar() + 
  xlab("") +
  theme(plot.title = element_text(hjust = 0.5)) + theme_bw()
ggsave(filename = "figures/iv_plot3.pdf", plot = iv_plot3)

iv_plot4 <-
  ggplot(data = subset(myData, !is.na(internet)), aes(x = internet)) +
  geom_bar() + 
  xlab("") +
  theme(plot.title = element_text(hjust = 0.5)) + theme_bw()
ggsave(filename = "figures/iv_plot4.pdf", plot = iv_plot4)

iv_plot5 <-
  ggplot(data = subset(myData, !is.na(social_media)), aes(x = social_media)) +
  geom_bar() + 
  xlab("") +
  theme(plot.title = element_text(hjust = 0.5)) + theme_bw()
ggsave(filename = "figures/iv_plot5.pdf", plot = iv_plot5)

# Average Number of Districts per country
myData_clusters <- 
  myData %>%
  dplyr::select(country, district) %>%
  group_by(country) %>%
  filter(country != "Algeria") %>% # Remove countries that are not in model
  filter(country != "Egypt") %>%
  filter(country != "Sudan") %>%
  summarise(length(unique(district))) 

new_names <- c("country", "district")
colnames(myData_clusters) <- new_names
myData_clusters$district <- as.numeric(myData_clusters$district)
mean(myData_clusters$district)

# Average Number of Respondents per District
myData_districts_indiv <- 
  myData %>%
  dplyr::select(country, district, respno) %>%
  group_by(country, district ) %>%
  filter(country != "Algeria") %>% # Remove countries that are not in model
  filter(country != "Egypt") %>%
  filter(country != "Sudan") %>%
  summarise(length(unique(respno))) 

new_names <- c("country", "district", "respno")
colnames(myData_districts_indiv) <- new_names
myData_districts_indiv$respno <- as.numeric(myData_districts_indiv$respno)
summary(myData_districts_indiv$respno)

# Media Consumption Rates R5 & R6
# To create Table 1 in main text 

# Need to re-load R6 data and not convert to numeric 
myData <- readRDS("data/afrobarometer.rds")

# Load raw round 5 data 
AFROBAROMETER_FILE <- project_path("data-raw", "afrobarometer",
                                   "merged-round-5-data-34-countries-2011-2013-last-update-july-2015.sav")

# Clean Round 5 Afrobarometer Data
# - All Afrobarometer variables are by defalt upper-cased, so make lower-case
# - Since this is SPSS, convert the labelled data to actual factors.
#    See the haven documentation for the difference
# 
round_5 <-
  read_spss(AFROBAROMETER_FILE) %>%
  {set_names(., str_to_lower(names(.)))} %>%
  mutate_if(.predicate = is.labelled, .funs = funs(as_factor))

# Radio q13a
round_5 %<>%
  mutate(radio = as.factor(q13a),
         radio = fct_recode(radio,
                            "NA" = "Missing",
                            "Never" = "Never",
                            "Less than once a month" = "Less than once a month",
                            "A few times a month" = "A few times a month",
                            "A few times a week" = "A few times a week",
                            "Every day" = "Every day",
                            "NA" = "Don't know",
                            "NA" = "Refused")) %>%
  replace_with_na(replace = list(radio = "NA")) %>% # convert character NA to actual NA
  droplevels(round_5$radio) #drop unused level 

# TV q13b
round_5 %<>%
  mutate(tv = as.factor(q13b),
         tv = fct_recode(tv,
                            "NA" = "Missing",
                            "Never" = "Never",
                            "Less than once a month" = "Less than once a month",
                            "A few times a month" = "A few times a month",
                            "A few times a week" = "A few times a week",
                            "Every day" = "Every day",
                            "NA" = "Don't know",
                            "NA" = "Refused")) %>%
  replace_with_na(replace = list(tv = "NA")) %>% # convert character NA to actual NA
  droplevels(round_5$tv) #drop unused level 

# Newspaper q13c
round_5 %<>%
  mutate(newspaper = as.factor(q13c),
         newspaper = fct_recode(newspaper,
                            "NA" = "Missing",
                            "Never" = "Never",
                            "Less than once a month" = "Less than once a month",
                            "A few times a month" = "A few times a month",
                            "A few times a week" = "A few times a week",
                            "Every day" = "Every day",
                            "NA" = "Don't know",
                            "NA" = "Refused")) %>%
  replace_with_na(replace = list(newspaper = "NA")) %>% # convert character NA to actual NA
  droplevels(round_5$newspaper) #drop unused level 

# Internet q13d
round_5 %<>%
  mutate(internet = as.factor(q13d),
         internet = fct_recode(internet,
                            "NA" = "Missing",
                            "Never" = "Never",
                            "Less than once a month" = "Less than once a month",
                            "A few times a month" = "A few times a month",
                            "A few times a week" = "A few times a week",
                            "Every day" = "Every day",
                            "NA" = "Don't know",
                            "NA" = "Refused")) %>%
  replace_with_na(replace = list(internet = "NA")) %>% # convert character NA to actual NA
  droplevels(round_5$internet) #drop unused level 

# Create table with round 5 media consumption rates
round_5_media <- dplyr::select(round_5, radio, tv, newspaper, internet)

r5.radio <- length(which(round_5_media$radio != "Never")) / length(round_5_media$radio)
r5.tv <- length(which(round_5_media$tv != "Never")) / length(round_5_media$tv)
r5.newspaper <- length(which(round_5_media$newspaper != "Never")) / length(round_5_media$newspaper)
r5.internet <- length(which(round_5_media$internet != "Never")) / length(round_5_media$internet)

# Create table with round 6 media consumption rates
round_6_media <- dplyr::select(myData, radio, tv, newspaper, internet)
  #make sure these are not as.numeric

r6.radio <- length(which(round_6_media$radio != "Never")) / length(round_6_media$radio)
r6.tv <- length(which(round_6_media$tv != "Never")) / length(round_6_media$tv)
r6.newspaper <- length(which(round_6_media$newspaper != "Never")) / length(round_6_media$newspaper)
r6.internet <- length(which(round_6_media$internet != "Never")) / length(round_6_media$internet)

# Generate new table with round 5 and round 6 consumption rates
my_table <- matrix(c(100*r5.radio,100*r6.radio, 100*r5.tv,100*r6.tv, 
                     100*r5.newspaper,100*r6.newspaper, 100*r5.internet,100*r6.internet), 
                   nrow = 4, ncol=2, byrow=TRUE,
                   dimnames = list(c("Radio", "TV", "Newspaper", "Internet"), 
                                   c("Round 5 (2011-2013)", "Round 6 (2015-2016)")))

# Calculate percent change between years for each medium
radio.change <- ((r6.radio-r5.radio)/r5.radio)*100
tv.change <- ((r6.tv-r5.tv)/r5.tv)*100
newspaper.change <- ((r6.newspaper-r5.newspaper)/r5.newspaper)*100
internet.change <- ((r6.internet-r5.internet)/r5.internet)*100

pct_chg_table <- matrix(c(radio.change,  "Radio",
                          tv.change, "TV",
                          newspaper.change, "Newspaper",
                          internet.change, "Internet"),
                        nrow = 4, ncol=2, byrow=TRUE,
                        dimnames = list(c("1", "2", "3", "4"), 
                                        c("value", "medium")))
pct_chg_table <- as.data.frame(pct_chg_table)
pct_chg_table$value <- as.character(pct_chg_table$value)
pct_chg_table$value <- as.numeric(pct_chg_table$value)

pct_chg_table$value <- round(pct_chg_table$value, 2)
dimnames(pct_chg_table) <- list(c("1", "2", "3", "4"), 
                                c("% Change", "medium"))

pct_chg_table$medium <- factor(pct_chg_table$medium,         # set levels
                        levels = c("Radio",
                                   "TV",
                                   "Newspaper",
                                   "Internet")) 

my_table <- round(as.data.frame(my_table), 2) #pull the table I created above w/ R5 & R6 data
media_chg_table <- bind_cols(my_table, pct_chg_table) #combine it with the %change table
media_chg_table <- media_chg_table[,c(4,1,2,3)] #reorder the columns
media_chg_table <- as.matrix(media_chg_table)

# Latex output (Table 1 in Main Text)
stargazer(media_chg_table, digits = 2, label = "r5_r6_media",
          title = "Per cent of Afrobarometer Respondents who Consume Medium at least Once per Month" )
